Quality

Preamble

Dependencies

Code
library(dplyr)
library(tidyr)
library(scran)
library(scater)
library(ggplot2)
library(patchwork)
library(SpatialExperiment)

Loading

Code
dir <- file.path("..", "data", "Visium")
ids <- list.dirs(dir, recursive = FALSE)
ids <- ids[grep("^B[0-9]+", basename(ids))]
names(ids) <- gsub("_[0-9]*$", "", basename(ids))
spe <- read10xVisium(ids, type = "HDF5", images = "lowres")
spe$barcode <- colnames(spe); colnames(spe) <- 
    paste(spe$sample_id, spe$barcode, sep = ".")
Code
md <- read.csv(file.path(dir, "metadata", "md.csv"))
md <- md[!is.na(md$Used.for.10x), ]
md$sample_id <- gsub("-", "_", md$B.number)
idx <- match(spe$sample_id, md$sample_id)
table(spe$TumorType <- factor(md$Tumor.type[idx]))

ccRCC  LSCC  LUAD 
 9564 16010  2158 
Code
csv <- file.path(dir, "metadata", "labels.csv")
lab <- read.csv(csv, row.names = 1)
lab$TLS[lab$TLS == ""] <- NA
spe$label <- factor(NA, unique(lab$TLS), exclude = NULL)
idx <- split(seq(ncol(spe)), spe$sample_id)
lab <- split(lab, lab$Patient_ID)
for (. in names(ids)) {
    bcs <- gsub("-[0-9]$", "-1", lab[[.]]$Barcode)
    xdi <- match(spe[, idx[[.]]]$barcode, bcs)
    spe[, idx[[.]]]$label <- lab[[.]]$TLS
}
table(spe$sample_id, spe$label)
           
            <NA>  NOR  TUM INFL  TLS EXCL   LN
  B04_17776 1814 1664  585  141  161    0    0
  B05_32288 1869  574  355  136   80    0    0
  B05_8527  2584  330 1089   40   43   87    0
  B06_24137 1494 1137 1042   62  407    0   33
  B06_24784 1064 1126  718  178  138    0    0
  B07_30616 1372  266  491    6   32    0    0
  B07_38596 2270  493  593   27   96  977    0
  B15_11190 1568  160  364   39   27    0    0

Wrangling

Code
rd <- rowData(spe)
rd$ensembl <- rownames(spe)
rownames(spe) <- make.names(rd$symbol)
spatialCoordsNames(spe) <- c("x", "y")
spe$sample_id <- factor(spe$sample_id)
Code
spe
class: SpatialExperiment 
dim: 17878 27732 
metadata(0):
assays(1): counts
rownames(17878): SAMD11 NOC2L ... TMSB4Y KDM5D
rowData names(1): symbol
colnames(27732): B04_17776.AAACAACGAATAGTTC-1
  B04_17776.AAACAAGTATCTCCCA-1 ... B15_11190.TTGTTTCATTAGTCTA-1
  B15_11190.TTGTTTCCATACAACT-1
colData names(7): in_tissue array_row ... TumorType label
reducedDimNames(0):
mainExpName: NULL
altExpNames(0):
spatialCoords names(2) : x y
imgData names(4): sample_id image_id data scaleFactor

Exploratory

Code
# add gene-(sample-) & spot-level QC summaries 
sub <- split(seq(ncol(spe)), spe$sample_id)
spe <- addPerFeatureQC(spe, subsets = sub)
spe <- addPerCellQC(spe)
# get tables of gene & spot metadata
rd <- .df(spe, margin = 1)
cd <- .df(spe, margin = 2)

Genes

Code
dr <- rd %>% 
    select(matches("subsets.*detected")) %>% 
    pivot_longer(everything()) %>% 
    mutate(sample_id = gsub("subsets_(.*)_detected", "\\1", name))
ggplot(dr, aes(value, col = sample_id)) &
    labs(x = "% of cells with non-zero count") &
    geom_density(key_glyph = "point") & scale_x_sqrt() & 
    guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) &
    theme_bw() & theme(aspect.ratio = 2/3, 
        panel.grid.minor = element_blank(), 
        legend.key.size = unit(0.5, "lines")) 

Spots

Code
ggplot(cd, aes(total, col = sample_id)) +
ggplot(cd, aes(detected, col = sample_id)) +
ggplot(cd, aes(total, col = label)) +
ggplot(cd, aes(detected, col = label)) +
    plot_layout(nrow = 2, guides = "collect") &
    geom_density(key_glyph = "point") & scale_x_log10() & 
    guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) &
    theme_bw() & theme(legend.key.size = unit(0.5, "lines")) 

Code
cd %>% 
  group_by(sample_id) %>% 
  mutate(total = .scale01(log10(total))) %>% 
  ggplot(aes(x, y, col = total)) +
  geom_point(shape = 16, size = 1) +
  facet_wrap(~ sample_id, nrow = 2) +
  scale_color_viridis_c() +
  coord_equal() + theme_void() +
  theme(legend.position = "none")

Code
cd %>% 
  ggplot(aes(x, y, col = label)) +
  geom_point(shape = 16, size = 1) +
  facet_wrap(~ sample_id, nrow = 2) +
  coord_equal() + theme_void() +
  theme(legend.position = "bottom") +
  guides(col = guide_legend(nrow = 1, 
      override.aes = list(size = 3)))

Code
ns <- as.data.frame(table(sample_id = spe$sample_id, label = spe$label))
ggplot(ns, aes(label, Freq, fill = label)) +
    facet_wrap(~ sample_id, scales = "free_y", nrow = 2) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(y = "# spots") + theme_bw() + theme(
        panel.grid.minor = element_blank(),
        legend.key.size = unit(0.5, "lines"),
        axis.text.x = element_text(angle = 45, hjust = 1))

Code
ggplot(ns, aes(sample_id, Freq, fill = sample_id)) +
    facet_wrap(~ label, scales = "free_y", nrow = 2) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(y = "# spots") + theme_bw() + theme(
        panel.grid = element_blank(),
        legend.key.size = unit(0.5, "lines"),
        axis.text.x = element_text(angle = 45, hjust = 1))

Filtering

Code
# drop spots w/ overall low counts
table(cs <- !isOutlier(cd$total, 
    batch = cd$sample_id, 
    type = "lower",
    log = TRUE))

FALSE  TRUE 
  178 27554 
Code
# for each tumor type, get genes detected
# in at least 1% of cells in all samples
y <- t(as.matrix(counts(spe) > 0))
idx <- split(seq(ncol(spe)), spe$TumorType == "ccRCC")
det <- lapply(idx, \(.) {
    ids <- droplevels(spe$sample_id[.])
    det <- by(y[., ], ids, \(.) colnames(.)[colMeans(.) >= 0.01])
    Reduce(intersect, det)
})
sapply(det, length)
FALSE  TRUE 
 9491 10364 
Code
length(gs <- Reduce(intersect, det))
[1] 8701
Code
dim(spe <- spe[gs, cs])
[1]  8701 27554
Code
# drop spots labelled for exclusion
spe <- spe[, spe$label != "EXCL"]
spe$label <- droplevels(spe$label)
dim(spe)
[1]  8701 26490

Appendix

Code
saveRDS(spe, file.path("..", "outs", "01-spe.rds"))
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.2.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] SpatialExperiment_1.10.0    patchwork_1.1.2            
 [3] scater_1.28.0               ggplot2_3.4.2              
 [5] scran_1.28.1                scuttle_1.10.1             
 [7] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.1
 [9] Biobase_2.60.0              GenomicRanges_1.52.0       
[11] GenomeInfoDb_1.36.0         IRanges_2.34.0             
[13] S4Vectors_0.38.1            BiocGenerics_0.46.0        
[15] MatrixGenerics_1.12.0       matrixStats_0.63.0         
[17] tidyr_1.3.0                 dplyr_1.1.2                

loaded via a namespace (and not attached):
 [1] bitops_1.0-7              gridExtra_2.3            
 [3] rlang_1.1.1               magrittr_2.0.3           
 [5] compiler_4.3.0            DelayedMatrixStats_1.22.0
 [7] vctrs_0.6.2               pkgconfig_2.0.3          
 [9] crayon_1.5.2              fastmap_1.1.1            
[11] magick_2.7.4              XVector_0.40.0           
[13] utf8_1.2.3                rmarkdown_2.21           
[15] ggbeeswarm_0.7.2          purrr_1.0.1              
[17] xfun_0.39                 bluster_1.10.0           
[19] zlibbioc_1.46.0           beachmat_2.16.0          
[21] jsonlite_1.8.4            rhdf5filters_1.12.1      
[23] DelayedArray_0.26.2       Rhdf5lib_1.22.0          
[25] BiocParallel_1.33.11      irlba_2.3.5.1            
[27] parallel_4.3.0            cluster_2.1.4            
[29] R6_2.5.1                  limma_3.56.0             
[31] Rcpp_1.0.10               knitr_1.42               
[33] R.utils_2.12.2            Matrix_1.5-4             
[35] igraph_1.4.2              tidyselect_1.2.0         
[37] rstudioapi_0.14           yaml_2.3.7               
[39] viridis_0.6.3             codetools_0.2-19         
[41] lattice_0.21-8            tibble_3.2.1             
[43] withr_2.5.0               evaluate_0.21            
[45] pillar_1.9.0              generics_0.1.3           
[47] RCurl_1.98-1.12           sparseMatrixStats_1.12.0 
[49] munsell_0.5.0             scales_1.2.1             
[51] glue_1.6.2                metapod_1.8.0            
[53] tools_4.3.0               BiocNeighbors_1.18.0     
[55] ScaledMatrix_1.8.1        locfit_1.5-9.7           
[57] rhdf5_2.44.0              grid_4.3.0               
[59] DropletUtils_1.20.0       edgeR_3.42.2             
[61] colorspace_2.1-0          GenomeInfoDbData_1.2.10  
[63] beeswarm_0.4.0            BiocSingular_1.16.0      
[65] HDF5Array_1.28.1          vipor_0.4.5              
[67] cli_3.6.1                 rsvd_1.0.5               
[69] fansi_1.0.4               S4Arrays_1.0.1           
[71] viridisLite_0.4.2         gtable_0.3.3             
[73] R.methodsS3_1.8.2         digest_0.6.31            
[75] ggrepel_0.9.3             dqrng_0.3.0              
[77] rjson_0.2.21              htmlwidgets_1.6.2        
[79] htmltools_0.5.5           R.oo_1.25.0              
[81] lifecycle_1.0.3           statmod_1.5.0